1
2
|
import altair as alt
from vega_datasets import data
|
Incomplete notes based on this awesome tutorial by Jake VanderPlas, which I followed up to 1:30:00. Tbc if I start using Altair more in the future.
Vega datasets
1
2
3
|
# List available datasets
data.list_datasets()[:10]
|
['7zip',
'airports',
'annual-precip',
'anscombe',
'barley',
'birdstrikes',
'budget',
'budgets',
'burtin',
'cars']
1
2
3
|
# Get dataset details
data.airports.description
|
'This dataset lists US airports, including airport code, city, state, latitude, and longitude. This dataset is a subset of the data compiled and published at http://ourairports.com/data/, and is in the public domain.'
Quick overview
This and following sections are based on this tutorial.
1
2
|
cars = data.cars()
cars.head(2)
|
|
Name |
Miles_per_Gallon |
Cylinders |
Displacement |
Horsepower |
Weight_in_lbs |
Acceleration |
Year |
Origin |
| 0 |
chevrolet chevelle malibu |
18.0 |
8 |
307.0 |
130.0 |
3504 |
12.0 |
1970-01-01 |
USA |
| 1 |
buick skylark 320 |
15.0 |
8 |
350.0 |
165.0 |
3693 |
11.5 |
1970-01-01 |
USA |
1
2
3
|
# One point for each car plotted on top of each other
alt.Chart(cars).mark_point()
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon'
)
|
1
2
3
|
alt.Chart(cars).mark_line().encode(
x='Miles_per_Gallon'
)
|
1
2
3
|
alt.Chart(cars).mark_tick().encode(
x='Miles_per_Gallon'
)
|
1
2
3
4
|
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower'
)
|
1
2
3
4
|
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower'
).interactive()
|
1
2
3
|
alt.Chart(cars).mark_tick().encode(
x=alt.X('Miles_per_Gallon', bin=True)
)
|
1
2
3
4
5
|
# Get a histogram (boom!)
alt.Chart(cars).mark_bar().encode(
x=alt.X('Miles_per_Gallon', bin=True),
y='count()'
)
|
1
2
3
4
5
6
7
|
# Get a 2D histogram (boooooom!)
# This is the power of declarative grammar right there!
alt.Chart(cars).mark_bar().encode(
x=alt.X('Miles_per_Gallon', bin=True),
y=alt.Y('Horsepower', bin=True),
color='count()'
)
|
Leverating grammar of interaction
1
2
3
4
5
6
7
8
9
|
interval = alt.selection_interval()
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color='Origin'
).properties(
selection=interval
)
|
1
2
3
4
5
6
7
8
9
|
interval = alt.selection_interval(encodings=['x'])
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color='Origin'
).properties(
selection=interval
)
|
1
2
3
4
5
6
7
8
9
|
interval = alt.selection_interval(encodings=['x', 'y'])
alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color=alt.condition(interval, 'Origin', alt.value('lightgray'))
).properties(
selection=interval
)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
# Select on the left and go from left to right
interval = alt.selection_interval(encodings=['x'])
chart = alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color=alt.condition(interval, 'Origin', alt.value('lightgray')),
tooltip='Name'
).properties(
selection=interval
)
chart | chart.encode(x='Acceleration')
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
# Select on the left and go from left to right
interval = alt.selection_interval(encodings=['x'])
chart = alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color=alt.condition(interval, 'Origin', alt.value('lightgray')),
tooltip='Name'
).properties(
selection=interval
)
hist = alt.Chart(cars).mark_bar().encode(
x='count()',
y='Origin',
color='Origin'
)
chart & hist
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
# Select on the left and go from left to right
interval = alt.selection_interval(encodings=['x'])
chart = alt.Chart(cars).mark_point().encode(
x='Miles_per_Gallon',
y='Horsepower',
color=alt.condition(interval, 'Origin', alt.value('lightgray')),
tooltip='Name'
).properties(
selection=interval
)
hist = alt.Chart(cars).mark_bar().encode(
x='count()',
y='Origin',
color='Origin'
).transform_filter(
interval
)
chart & hist
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
import altair as alt
from vega_datasets import data
source = data.cars()
# Configure the options common to all layers
brush = alt.selection(type='interval')
base = alt.Chart(source).add_selection(brush)
# Configure the points
points = base.mark_point().encode(
x=alt.X('Miles_per_Gallon', title=''),
y=alt.Y('Horsepower', title=''),
color=alt.condition(brush, 'Origin', alt.value('grey'))
)
# Configure the ticks
tick_axis = alt.Axis(labels=False, domain=False, ticks=False)
x_ticks = base.mark_tick().encode(
alt.X('Miles_per_Gallon', axis=tick_axis),
alt.Y('Origin', title='', axis=tick_axis),
color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
)
y_ticks = base.mark_tick().encode(
alt.X('Origin', title='', axis=tick_axis),
alt.Y('Horsepower', axis=tick_axis),
color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
)
# Build the chart
y_ticks | (points & x_ticks)
|
Core concepts of simple charts
Three elements of a simple chart:
- Data, marks, encoding, the three core pieces of an Altair plot
- Encoding types (Q, N, O, T), which drive the visual representation of encodings.
- Binning and aggregation, which control aspects of data representation.
1
2
|
import altair as alt
from vega_datasets import data
|
Marks
- Many different marks available
- Use tab completion on alt.Chart.mark to see them all
Encodings
1
2
3
|
alt.Chart(cars).mark_point().encode(
x='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
y='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
color='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
opacity='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
shape='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
size='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
row='Origin'
)
|
1
2
3
|
alt.Chart(cars).mark_point().encode(
column='Origin'
)
|
1
2
3
4
5
6
7
|
# Stacked bar graph
alt.Chart(cars).mark_bar().encode(
y='Origin',
x='count()',
color='Cylinders'
)
|
1
2
3
4
5
6
|
alt.Chart(cars).mark_point().encode(
x='Displacement',
y='Horsepower',
shape='Origin',
color='Origin'
)
|
Datatypes
Datatypes are inferred, but explicitly thinking about and setting them is worth it.
1
2
3
4
5
|
alt.Chart(cars).mark_tick().encode(
x='Miles_per_Gallon:Q',
y='Origin:N',
color='Cylinders'
)
|
1
2
3
4
5
|
alt.Chart(cars).mark_tick().encode(
x='Miles_per_Gallon:Q',
y='Origin:N',
color='Cylinders:O'
)
|
Binning and aggregation
1
2
3
4
|
import altair as alt
from vega_datasets import data
cars = data.cars()
|
Aggregation functions implement split-apply-combine sequence just like groupby().
1
|
cars.groupby('Origin')['Miles_per_Gallon'].mean()
|
Origin
Europe 27.891429
Japan 30.450633
USA 20.083534
Name: Miles_per_Gallon, dtype: float64
1
2
3
4
|
alt.Chart(cars).mark_bar().encode(
x='mean(Miles_per_Gallon)',
y='Origin:N'
)
|
We can easily group data by Origin and cylinders and apply the same mean aggregation:
1
2
3
4
5
6
|
alt.Chart(cars).mark_bar().encode(
x='mean(Miles_per_Gallon)',
y='Cylinders:O',
row='Origin',
color='Origin'
)
|
1
2
3
4
5
|
alt.Chart(cars).mark_bar().encode(
alt.X('Miles_per_Gallon', bin=True),
alt.Y('count()'),
alt.Color('Origin')
)
|
1
2
3
4
5
|
alt.Chart(cars).mark_bar().encode(
alt.X('count()'),
alt.Y('Origin'),
alt.Color('Miles_per_Galloan:N', bin=True)
)
|
1
2
3
4
5
|
alt.Chart(cars).mark_bar().encode(
alt.Y('Origin'),
alt.X('Miles_per_Gallon', bin=alt.Bin(maxbins=20)),
alt.Color('count()')
)
|
Iris
1
2
|
iris = data.iris()
iris.head()
|
|
sepalLength |
sepalWidth |
petalLength |
petalWidth |
species |
| 0 |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
| 1 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
| 2 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
| 3 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
| 4 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
array(['setosa', 'versicolor', 'virginica'], dtype=object)
1
2
3
4
5
6
7
8
|
alt.Chart(iris).mark_circle().encode(
x='sepalWidth',
y='petalWidth',
color='species',
).properties(
width=300,
height=300
)
|
1
2
3
4
5
6
7
8
9
|
iris = data.iris()
chart = alt.Chart(iris).mark_point().encode(
x='petalLength',
y='sepalWidth',
color='species'
)
chart | chart.encode(x='sepalLength') | chart.encode(y='petalWidth')
|